import requests,random,csv
import os,re,IPy,json,time,random
def top500web(URL):
    UA= [
            "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0"
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14",
            "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)"
            ]
    HEAD={
            "User-Agent":random.choice(UA)
              }

    r=requests.get(url=URL,headers=HEAD,timeout=10)
    r.encoding = "utf8"
    res=r.text
    #print(res)
    ht=re.findall('<a target="_blank" href="[^"]+',res)
    #print(ht)
    for x in ht:
        x=x.split('href="')
        print(x[1])
        with open(time.strftime("%Y-%m-%d", time.localtime()) + 'top500.txt', 'a') as f:
            f.write(x[1]+"\n")


if __name__ == '__main__':
    with open(time.strftime("%Y-%m-%d", time.localtime())+'top500.txt', 'w') as f:
        f.write(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())+"\n")
    for x in range(1,30):
        if x==1:
            x=""
        else:
            x="/"+str(x)
        url="http://www.alexa.cn/siterank%s" % x
        #print(url)
        top500web(url)
